bbdc473c3133edd8112134db5dd083fc96b286f0,src/main/java/com/datumbox/examples/Classification.java,Classification,main,#String[]#,57
Before Change
headerDataTypes.put("test result", TypeInference.DataType.CATEGORICAL);
Dataset trainingDataset = Dataset.Builder.parseCSVFile(fileReader, "test result", headerDataTypes, '\t', '"', "\r\n", dbConf);
Dataset testingDataset = trainingDataset.copy();
//Transform Dataset
//-----------------
//Normalize continuous variables
XMinMaxNormalizer dataTransformer = new XMinMaxNormalizer("Diabetes", dbConf);
dataTransformer.fit_transform(trainingDataset, new XMinMaxNormalizer.TrainingParameters());
//Feature Selection
//-----------------
//Perform dimensionality reduction using PCA
PCA featureSelection = new PCA("Diabetes", dbConf);
PCA.TrainingParameters featureSelectionParameters = new PCA.TrainingParameters();
featureSelectionParameters.setMaxDimensions(trainingDataset.getVariableNumber()-1); //remove one dimension
featureSelectionParameters.setWhitened(false);
featureSelectionParameters.setVariancePercentageThreshold(0.99999995);
featureSelection.fit_transform(trainingDataset, featureSelectionParameters);
//Fit the classifier
//------------------
SoftMaxRegression classifier = new SoftMaxRegression("Diabetes", dbConf);
SoftMaxRegression.TrainingParameters param = new SoftMaxRegression.TrainingParameters();
param.setTotalIterations(200);
param.setLearningRate(0.1);
classifier.fit(trainingDataset, param);
//Denormalize trainingDataset (optional)
dataTransformer.denormalize(trainingDataset);
//Use the classifier
//------------------
//Apply the same data transformations on testingDataset
dataTransformer.transform(testingDataset);
//Apply the same featureSelection transformations on testingDataset
featureSelection.transform(testingDataset);
//Get validation metrics on the training set
SoftMaxRegression.ValidationMetrics vm = classifier.validate(testingDataset);
classifier.setValidationMetrics(vm); //store them in the model for future reference
//Denormalize testingDataset (optional)
dataTransformer.denormalize(testingDataset);
System.out.println("Results:");
for(Integer rId: testingDataset) {
Record r = testingDataset.get(rId);
System.out.println("Record "+rId+" - Real Y: "+r.getY()+", Predicted Y: "+r.getYPredicted());
}
System.out.println("Classifier Statistics: "+PHPfunctions.var_export(vm));
//Clean up
//--------
//Erase data transformer, featureselector and classifier.
dataTransformer.erase();
featureSelection.erase();
classifier.erase();
//Erase datasets.
trainingDataset.erase();
testingDataset.erase();
}
}
After Change
headerDataTypes.put("test result", TypeInference.DataType.CATEGORICAL);
trainingDataframe = Dataframe.Builder.parseCSVFile(fileReader, "test result", headerDataTypes, '\t', '"', "\r\n", dbConf);
}
catch(UncheckedIOException | IOException | URISyntaxException ex) {
throw new RuntimeException(ex);
}
Dataframe testingDataframe = trainingDataframe.copy();
//Transform Dataframe
//-----------------
//Normalize continuous variables
XMinMaxNormalizer dataTransformer = new XMinMaxNormalizer("Diabetes", dbConf);
dataTransformer.fit_transform(trainingDataframe, new XMinMaxNormalizer.TrainingParameters());
//Feature Selection
//-----------------
//Perform dimensionality reduction using PCA
PCA featureSelection = new PCA("Diabetes", dbConf);
PCA.TrainingParameters featureSelectionParameters = new PCA.TrainingParameters();
featureSelectionParameters.setMaxDimensions(trainingDataframe.xColumnSize()-1); //remove one dimension
featureSelectionParameters.setWhitened(false);
featureSelectionParameters.setVariancePercentageThreshold(0.99999995);
featureSelection.fit_transform(trainingDataframe, featureSelectionParameters);
//Fit the classifier
//------------------
SoftMaxRegression classifier = new SoftMaxRegression("Diabetes", dbConf);
SoftMaxRegression.TrainingParameters param = new SoftMaxRegression.TrainingParameters();
param.setTotalIterations(200);
param.setLearningRate(0.1);
classifier.fit(trainingDataframe, param);
//Denormalize trainingDataframe (optional)
dataTransformer.denormalize(trainingDataframe);
//Use the classifier
//------------------
//Apply the same data transformations on testingDataframe
dataTransformer.transform(testingDataframe);
//Apply the same featureSelection transformations on testingDataframe
featureSelection.transform(testingDataframe);
//Get validation metrics on the training set
SoftMaxRegression.ValidationMetrics vm = classifier.validate(testingDataframe);
classifier.setValidationMetrics(vm); //store them in the model for future reference
//Denormalize testingDataframe (optional)
dataTransformer.denormalize(testingDataframe);
System.out.println("Results:");
for(Map.Entry<Integer, Record> entry: testingDataframe.entries()) {
Integer rId = entry.getKey();
Record r = entry.getValue();
System.out.println("Record "+rId+" - Real Y: "+r.getY()+", Predicted Y: "+r.getYPredicted());
}
System.out.println("Classifier Statistics: "+PHPfunctions.var_export(vm));
//Clean up
//--------
//Erase data transformer, featureselector and classifier.
dataTransformer.delete();
featureSelection.delete();
classifier.delete();
//Erase Dataframes.
trainingDataframe.delete();
testingDataframe.delete();
}
}